package cn.yoyo;

import cn.yoyo.entity.Blog;
import com.alibaba.excel.EasyExcel;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * @Remark: 爬取博客demo
 * @Author: yoyo
 * @Since: 2022/8/25 15:04
 */
public class BlogDemo {

    public static void main(String[] args){
        // 获取前n页数据
        List<Blog> list = getInfo("https://www.aquanliang.com/blog/",59);
        for (Blog blog : list){
            System.out.println(blog.toString());
        }
        EasyExcel.write("D:\\worko_Olll\\圈量SCRMBlog3.xlsx",Blog.class)
                .sheet()
                .doWrite(list);
    }

    public static List<Blog> getInfo(String url,int page){
        List<Blog> blogList = new ArrayList<>();
        for (int i = 1;i<page+1;i++){
            try {
                Document doc = Jsoup.connect(url).get();
                Elements main = doc.getElementsByClass("_1ySUUwWwmubujD8B44ZDzy");
                Elements spanList = main.select("span");
                for (Element span : spanList) {
                    // 博客内容的第一层div
                    Elements div = span.select("div");
                    // 第二层div
                    Elements div2 = div.select("div");
                    // 第二层div内的a标签
                    Elements a = div2.select("a");
                    String title = a.select("div").text();
                    // 第三层div
                    Element div3 = div2.select("div").get(8);
                    String publishDate = div3.select("div").get(2).text();
                    Integer pageview = Integer.valueOf(div3.select("div").get(7).text());
                    String imagePath = span.select("img").attr("src");
                    Blog blog = new Blog();
                    blog.setPage(i);
                    blog.setTitle(title);
                    blog.setPageview(pageview);
                    blog.setPublishDate(publishDate);
                    blog.setSurfacePlot(new URL(imagePath));
                    blogList.add(blog);
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            url = "https://www.aquanliang.com/blog/page/" + (i + 1);
        }
        return blogList;
    }

}
